import pandas as pd
import spacy
from pathlib import Path
from emfdscore.scoring import (
    score_emfd_all_vice_virtue,  # noqa: F401
    score_mfd,  # noqa: F401
    score_mfd2,  # noqa: F401
)

script_dir = Path(__file__).resolve().parent


def emfd():
    nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
    nlp.add_pipe("mfd_tokenizer")
    nlp.add_pipe("score_emfd_all_vice_virtue", last=True)
    return nlp


def mfd():
    nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
    nlp.add_pipe("mfd_tokenizer")
    nlp.add_pipe("score_mfd", last=True)
    return nlp


def mfd2():
    nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
    nlp.add_pipe("mfd_tokenizer")
    nlp.add_pipe("score_mfd2", last=True)
    return nlp


def load_documents():
    sentences = pd.read_feather(script_dir / "../data/source/manifesto_corpus.feather")
    sentences = sentences[["text_en", "id_for_project"]]
    return sentences


def score_and_save_documents(df_of_corpus: pd.DataFrame):
    methods = {"emfd": emfd, "mfd": mfd, "mfd2": mfd2}
    Path(script_dir / "../data/mfd/").mkdir(parents=True, exist_ok=True)
    for method in methods.keys():
        print(f"Working on {method.upper()}")
        nlp_for_method = methods[method]()
        output_df = pd.DataFrame(list(nlp_for_method.pipe(df_of_corpus["text_en"])))
        output_df["id_for_project"] = df_of_corpus["id_for_project"]
        output_df.to_feather(script_dir / f"../data/mfd/{method}_scores.feather")


if __name__ == "__main__":
    sentences = load_documents()
    score_and_save_documents(sentences)
